/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.fetcher;
/**
* Constants for Fetcher status codes, etc.
*/
public interface FetcherConstants {
// Logging Constants
/** Misc logging code for stats dumps */
public static final int MISC_STATS= 0;
/** Misc logging code for logging code key */
public static final int MISC_KEY= 1;
/** Misc logging code for robots.txt forbidden events */
public static final int MISC_ROBOTS_FORBIDDEN= 2;
/** Misc logging code for META robots/noindex events */
public static final int MISC_META_NOINDEX= 3;
/** Misc logging code for META robots/nofollow events */
public static final int MISC_META_NOFOLLOW= 4;
/** Misc logging code for META robots/nocache events */
public static final int MISC_META_NOCACHE= 5;
/** Misc logging code for successful fetches */
public static final int MISC_FETCH_SUCCESS= 6;
/** Misc logging code for random informational messages */
public static final int MISC_INFORMATIONAL= 7;
/**
* The number of different misc codes we track (misc codes
* run from <code>0</code> through <code>NUM_MISC_CODES - 1</code>
*/
public static final int NUM_MISC_CODES= 8;
// Failures are non-retryable things that prevent fetches (404, etc).
// Failure reason-codes- values must run [0..NUM_FAIL_REASONS]
/** Catch-all failure reason **/
public static final int FAIL_UNKNOWN= 0;
/** The URL (or a redirect URL) was invalid **/
public static final int FAIL_BAD_URL= 1;
/** A robots.txt file forbids us from accessing this URL **/
public static final int FAIL_ROBOTS_EXCLUDED= 2;
/** The maximum number of failed attempts has been reached **/
public static final int FAIL_TOO_MANY_ERRORS= 3;
/** The maximum number of failed attempts has been reached **/
public static final int FAIL_TOO_MANY_REDIRECTS= 4;
/** Got 3xx status code, but not a new location **/
public static final int FAIL_REDIRECT_MISSING_TARGET= 5;
/** Page does not exist (404/not found) **/
public static final int FAIL_NOT_FOUND= 6;
/** Got 4xx/Forbidden response code **/
public static final int FAIL_FORBIDDEN= 7;
/** Found a redirect loop **/
public static final int FAIL_REDIRECT_LOOP_DETECTED= 8;
/** Hostname matches a ban pattern **/
public static final int FAIL_HOSTNAME_BANNED= 9;
/** Host declared dead aftertoo many failed fetches **/
public static final int FAIL_DEAD_HOST= 10;
/** We didn't recognize the HTTP response code */
public static final int FAIL_UNKNOWN_RESP_CODE= 11;
/** Unknown host */
public static final int FAIL_UNKNOWN_HOST= 12;
/** Connection refused */
public static final int FAIL_CONNECTION_REFUSED= 13;
/**
* The number of different failure codes we track (failure codes
* run from <code>0</code> through <code>NUM_FAIL_REASONS - 1</code>
*/
public static final int NUM_FAIL_REASONS= 14;
// Retryable errors- explain why didn't we get the page on this try
// Error reason-codes must run [0..NUM_ERR_REASONS]
/** Catch-all error reason **/
public static final int ERR_UNKNOWN= 0;
/** Connection timed out */
public static final int ERR_CONNECTION_TIMED_OUT= 1;
/** Bad header line */
public static final int ERR_BAD_HEADER_LINE= 2;
/** Connection reset by peer */
public static final int ERR_RESET_BY_PEER= 3;
/** Bad status line */
public static final int ERR_BAD_STATUS_LINE= 4;
/** EOF encountered during read */
public static final int ERR_EOF_DURING_READ= 5;
/** No route to host */
public static final int ERR_NO_ROUTE= 6;
/** Socket timeout */
public static final int ERR_SOCKET_TIMEOUT= 7;
/** Network unreachable */
public static final int ERR_NETWORK_UNREACHABLE= 8;
/** Bad Content-Length header */
public static final int ERR_BAD_CONTENT_LENGTH= 9;
/** Error parsing chunk length */
public static final int ERR_CHUNKLEN_PARSE= 10;
/** EOF in chunk */
public static final int ERR_CHUNK_EOF= 11;
/** Error uncompressing content */
public static final int ERR_DECOMPRESS= 12;
/** The number of different error codes we track (error codes
* run from <code>0</code> through <code>NUM_ERR_REASONS - 1</code>
*/
public static final int NUM_ERR_REASONS= 13;
/** output status code indicating success */
public static final int OUT_OK= 0;
/** output status code indicating an unknown failure */
public static final int OUT_UNKNOWN= 1;
/** output status code indicating DOM parse failure */
public static final int OUT_DOM_ERROR= 2;
/** output status code indicating an unexpected DOM parse exception */
public static final int OUT_DOM_EXCEPTION= 3;
/** output status code indicating an unhandled content type */
public static final int OUT_UNKNOWN_CONTENT= 4;
/** Output status code indicating a character encoding problem.
* (ie. sun.io.MalformedInputException) */
public static final int OUT_ENCODING_ERR= 5;
/** The number of different output error codes we track (error codes
* run from <code>0</code> through <code>NUM_OUT_STATUS - 1</code>
*/
public static final int NUM_OUT_STATUS= 6;
}